In [1]:
import networkx as nx
import math
import numpy as np

In [2]:
from IPython.display import Javascript

Load data


In [3]:
lastnode = 5000

In [4]:
datafile = open('/var/datasets/wdc/small-pld-arc')

G = nx.DiGraph()

for line in datafile:
    ijstr = line.split('\t')
    
    i=int(ijstr[0])
    j=int(ijstr[1])
    
    if i>lastnode:
        break
    if j>lastnode:
        continue
    G.add_edge(i,j)
    
datafile.close()
Gorig = G.copy()

In [5]:
indexfile = open('/var/datasets/wdc/small-pld-index')
index = {}

for line in indexfile:
    namei = line.split('\t')
    
    name=namei[0]
    i=int(namei[1])
    
    if i>lastnode:
        break

    index[i]=name
    
indexfile.close()

In [6]:
def cleanupgraph(G):
    comp = nx.weakly_connected_components(G.copy())
    for c in comp:
        if len(c)<4:
            G.remove_nodes_from(c)

In [7]:
def graphcleanup(G):
    for (node, deg) in G.degree_iter():
            if deg==0:
                G.remove_node(node)
            elif deg==1:
                if G.degree((G.predecessors(node) + G.successors(node))[0]) == 1:
                    G.remove_node(node)
            elif deg==2 and G.in_degree(node)==1:
                if (G.predecessors(node) == G.successors(node)) and G.degree((G.predecessors(node) + G.successors(node))[0]) == 2:
                    G.remove_node(node)

In [8]:
cleanupgraph(G)

In [9]:
G.size()


Out[9]:
232

In [10]:
Gorig.number_of_nodes()


Out[10]:
402

In [11]:
Gorig.size()


Out[11]:
409

Convert to Javascript for interactivity


In [12]:
#from IPython.core.display import display_javascript
import json
from networkx.readwrite import json_graph

In [13]:
d = json_graph.node_link_data(G)
for node in d['nodes']:
    node['name']=node['id']
    node['value']=G.degree(node['id'])
    if True:
        node['group'] = node['id'] % 4
    else:
        if node['id']<10:
            node['group']=0#node['id'] % 4
        else:
            node['group']=1#node['id'] % 4
        
d['adjacency'] = json_graph.adjacency_data(G)['adjacency']
json.dump(d, open('rwgraph.json','w'))

In [37]:
%%html
<div id="d3-example"></div>
<style>
.node {stroke: #fff; stroke-width: 1.5px;}
.link {stroke: #999; stroke-opacity: .3;}
</style>
<script src="randomwalk.js"></script>



In [17]:
Javascript(filename='force.js')


---------------------------------------------------------------------------
IOError                                   Traceback (most recent call last)
<ipython-input-17-76c543eb261d> in <module>()
----> 1 Javascript(filename='force.js')

/Users/migish/anaconda/lib/python2.7/site-packages/IPython/core/display.pyc in __init__(self, data, url, filename, lib, css)
    589         self.lib = lib
    590         self.css = css
--> 591         super(Javascript, self).__init__(data=data, url=url, filename=filename)
    592 
    593     def _repr_javascript_(self):

/Users/migish/anaconda/lib/python2.7/site-packages/IPython/core/display.pyc in __init__(self, data, url, filename)
    384         self.filename = None if filename is None else unicode_type(filename)
    385 
--> 386         self.reload()
    387         self._check_data()
    388 

/Users/migish/anaconda/lib/python2.7/site-packages/IPython/core/display.pyc in reload(self)
    402         """Reload the raw data from file or URL."""
    403         if self.filename is not None:
--> 404             with open(self.filename, self._read_flags) as f:
    405                 self.data = f.read()
    406         elif self.url is not None:

IOError: [Errno 2] No such file or directory: u'force.js'

In [50]:


In [19]:
L = nx.linalg.laplacianmatrix.directed_laplacian_matrix(G)
Linv = np.linalg.inv(L)

In [20]:
L.shape


Out[20]:
(138, 138)

In [21]:
n = L.shape[0]
Reff = np.zeros((n,n))

In [22]:
Gsparse = G.copy()

In [23]:
graphcleanup(Gsparse)

In [24]:
nodelookup={Gsparse.nodes()[idx]:idx for idx in range(len(Gsparse.nodes()))}

In [25]:
edge = np.zeros((n,1))
for (i,j) in Gsparse.edges_iter():
    edge[nodelookup[i]] = 1
    edge[nodelookup[j]] = -1
    Reff[nodelookup[i],nodelookup[j]] = edge.T.dot(Linv.dot(edge))
    edge[[nodelookup[i]]] = 0
    edge[[nodelookup[j]]] = 0

In [26]:
ReffAbs=np.abs(Reff)+np.abs(Reff.T)

If you call

arr.argsort()[:3] It will give you the indices of the 3 smallest elements.

array([0, 2, 1], dtype=int64) So, for n, you should call

arr.argsort()[:n]


In [27]:
res = ReffAbs.reshape(n**2)
argp = np.argpartition(res,n**2-n)

In [28]:
mask = (ReffAbs < res[argp[-int(0.5*Gsparse.number_of_nodes())]]) & (ReffAbs >0)
for (i,j) in Gsparse.edges():
    if mask[nodelookup[i],nodelookup[j]]:
        Gsparse.remove_edge(i,j)

In [29]:
cleanupgraph(Gsparse)

In [30]:
d = json_graph.node_link_data(Gsparse)
for node in d['nodes']:
    node['name']=index[node['id']]
    node['value']=Gsparse.degree(node['id'])
    node['group']=index[node['id']][-3:]

json.dump(d, open('graph.json','w'))

In [31]:
Gorig.number_of_edges()


Out[31]:
409

In [32]:
Gsparse.number_of_edges()


Out[32]:
17

In [33]:
Gsparse.number_of_nodes()


Out[33]:
16

In [ ]:


In [34]:
GsparseAdj = nx.linalg.adjacency_matrix(Gorig).toarray()
GsparseAdj = nx.to_numpy_matrix(Gorig)
GsparseAdj[ReffAbs < res[argp[-300]]] = 0
Gsparse = nx.from_numpy_matrix?
Gsparse = nx.from_numpy_matrix
Gsparse = nx.from_numpy_matrix(GsparseAdj, create_using=nx.DiGraph())

In [ ]:
Gsparse = nx.from_numpy_matrix

In [35]:
edge = np.zeros((n,1))
for i in range(n):
    if i % int(math.ceil((float(10)/100)*n)) == 0: 
        print int(math.floor(100*float(i)/n)), '%'
        
    edge[i] = 1

    for j in range(i+1, n):
        edge[j] = -1
        Reff[i,j] = edge.T.dot(Linv.dot(edge))
        edge[j] = 0
        
    edge[i] = 0


0 %
10 %
20 %
30 %
40 %
50 %
60 %
71 %
81 %
91 %

In [ ]: